import numpy as np
import pandas as pd
# Import data
trails = pd.read_csv("alltrailsexpanded.csv")
user = pd.read_csv("user input.csv")
# Remove columns
trails = trails.drop("Unnamed: 0", axis=1)
trails = trails.drop("_geoloc", axis=1)
trails = trails.drop("units", axis=1)
trails = trails.drop("country_name", axis=1)
trails = trails.drop("features_not_coded", axis=1)
trails = trails.drop("activities_not_coded", axis=1)
trails = trails.rename(columns={"latitiude": "latitude"})
# Categorize columns
numeric_cols = ['Trail ID','latitude','longitude','popularity','length','elevation_gain','difficulty_rating',
'visitor_usage','avg_rating','num_reviews','TF_dogs-no','TF_forest','TF_historic-site','TF_kids',
'TF_dogs-leash','TF_ada','TF_beach','TF_cave','TF_views','TF_river','TF_city-walk','TF_lake',
'TF_rails-trails','TF_partially-paved','TF_dogs','TF_wildlife','TF_paved','ACT_birding','ACT_hiking',
'ACT_backpacking','ACT_walking','ACT_camping','ACT_nature-trips','ACT_off-road-driving','ACT_road-biking',
'ACT_mountain-biking','ACT_scenic-driving','ACT_bike-touring','ACT_fishing','ACT_snowshoeing',
'ACT_paddle-sports','ACT_trail-running','ACT_horseback-riding','ACT_rock-climbing',
'ACT_cross-country-skiing','ACT_sea-kayaking','ACT_fly-fish','ACT_canoeing','ACT_whitewater-kayaking',
'ACT_skiing','ACT_surfing','ACT_snowboarding','ACT_ice-climbing','ACT_rails-trails','Grade']
character_cols = ['name','area_name','city_name','state_name','route_type']
# Ensure types
for col in numeric_cols:
trails[col] = pd.to_numeric(trails[col])
for col in character_cols:
trails[col] = trails[col].astype(str)
# Create Trail Distance Variable
from math import radians, cos, sin, asin, sqrt
# Find user coords
user_lat = float(user.loc()[54][3])
user_long = float(user.loc()[55][3])
def haversine(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 3956 # Radius of earth in miles
return c * r
distance_to_user = [0]
for index, row in trails.iterrows():
trail_lat = row['latitude']
trail_long = row['longitude']
dist = round(haversine(trail_long, trail_lat, user_long, user_lat),2)
distance_to_user.append(dist)
distance_to_user.pop(0)
trails['Distance'] = distance_to_user
numeric_cols.append('Distance')
print("User Location:",str(user_lat), str(user_long))
User Location: 37.282783 -113.309799
trail_info = trails[['Trail ID', 'name', 'length', 'difficulty_rating', 'Distance', 'area_name', 'latitude', 'longitude']]
# filter trails to meet required input
required_input = user[user['Class'] == 'Required']
valid_trails = trails
print("There are", str(len(valid_trails)), "valid trails.\n")
for index, row in required_input.iterrows():
starting_len = len(valid_trails)
currCol = row['Variable']
currFilter = row['Filter']
currValue = row['Value']
print(currCol,currFilter,currValue)
if currCol in numeric_cols:
currValue = int(currValue)
else:
currValue = str(currValue)
if currFilter == 'Equals':
valid_trails = valid_trails[valid_trails[currCol] == currValue]
elif currFilter == 'Greater Than':
valid_trails = valid_trails[valid_trails[currCol] >= currValue]
else:
valid_trails = valid_trails[valid_trails[currCol] <= currValue]
ending_len = len(valid_trails)
print("Filtered out",str(starting_len - ending_len),"trails.\n")
print("After filtering, there are",str(len(valid_trails)),"valid trails.")
There are 3313 valid trails. popularity Greater Than 5 Filtered out 1194 trails. avg_rating Greater Than 3 Filtered out 0 trails. TF_views Equals 1 Filtered out 177 trails. ACT_hiking Equals 1 Filtered out 104 trails. ACT_off-road-driving Equals 0 Filtered out 9 trails. Distance Less Than 1000 Filtered out 463 trails. After filtering, there are 1366 valid trails.
# Find optional criteria
optional_input = user[user['Class'] == 'Optional']
# This function creates new the new similarity column based on column type
def simi_finder(trail, currCol, currValue):
# all numeric
if currCol in numeric_cols:
currValue = int(currValue)
# binary field
if max(valid_trails[currCol] == 1):
if trail[currCol] == currValue:
return 1
else:
return 0
# continuous field (like length)
else:
if trail[currCol] != 0:
return max(0, round(1 - (abs(currValue - trail[currCol]) / trail[currCol]), 2))
else:
return 0
# all character
else:
currValue = str(currValue)
if trail[currCol] == currValue:
return 1
else:
return 0
for index, row in optional_input.iterrows():
currCol = row['Variable']
currValue = row['Value']
print(currCol, ' ', currValue)
newColName = currCol + '_similarity'
valid_trails[newColName] = valid_trails.apply(lambda trail: simi_finder(trail, currCol, currValue), axis=1)
length 5000 route_type out and back num_reviews 50 TF_historic-site 1 TF_cave 1 TF_river 1 TF_lake 1 TF_dogs 1 TF_paved 1 ACT_camping 1 ACT_mountain-biking 1 ACT_scenic-driving 1 ACT_trail-running 1
import warnings
warnings.simplefilter(action='ignore', category=Warning)
similarity_scores = valid_trails.filter(regex='similarity')
similarity_scores['score'] = round((similarity_scores.sum(axis=1) / len(optional_input))*100,2)
valid_trails['Score'] = similarity_scores['score']
trail_scores = valid_trails[['Trail ID', 'Score']]
trail_info = pd.merge(trail_info, trail_scores, on='Trail ID')
trail_info
| Trail ID | name | length | difficulty_rating | Distance | area_name | latitude | longitude | Score | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 10187810 | Bright Angel Trail to Bright Angel Campground ... | 28485.318 | 5 | 106.43 | Grand Canyon National Park | 36.05735 | -112.14381 | 25.00 |
| 1 | 10016964 | South Kaibab Trail to Cedar Ridge | 4988.954 | 3 | 108.70 | Grand Canyon National Park | 36.05346 | -112.08361 | 39.23 |
| 2 | 10237812 | Three-Mile Resthouse via Bright Angel Trail | 8690.436 | 5 | 106.44 | Grand Canyon National Park | 36.05701 | -112.14414 | 20.69 |
| 3 | 10245012 | South Kaibab, Phantom Ranch, and Bright Angel ... | 26875.978 | 5 | 108.70 | Grand Canyon National Park | 36.05344 | -112.08364 | 17.62 |
| 4 | 10265905 | South Kaibab Trail to Ooh Aah Point | 2896.812 | 3 | 108.71 | Grand Canyon National Park | 36.05309 | -112.08387 | 10.62 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1361 | 10003432 | Emma Matilda Lake Trail | 17702.740 | 3 | 477.38 | Grand Teton National Park | 43.87812 | -110.57270 | 17.54 |
| 1362 | 10293007 | Solitary Geyser | 2574.944 | 3 | 512.11 | Yellowstone National Park | 44.45921 | -110.82631 | 0.46 |
| 1363 | 10006242 | Dunraven Pass to Mount Washburn | 10943.512 | 5 | 538.82 | Yellowstone National Park | 44.78482 | -110.45348 | 20.15 |
| 1364 | 10235797 | Heart Lake and the Snake River Trails | 38624.160 | 5 | 505.88 | Yellowstone National Park | 44.31743 | -110.59815 | 31.77 |
| 1365 | 10010076 | Grebe Lake Trail | 10460.710 | 1 | 533.04 | Yellowstone National Park | 44.71791 | -110.54970 | 26.77 |
1366 rows × 9 columns
import plotly.express as px
import pandas as pd
color_scale = [(0, 'lightyellow'), (1,'darkgreen')]
fig = px.scatter_mapbox(trail_info,
lat="latitude",
lon="longitude",
hover_name="name",
hover_data=["Score", "area_name", "Distance"],
color="Score",
size="Score",
color_continuous_scale=color_scale,
zoom=3,
height=800,
width=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
trail_info[['name', 'Distance', 'length', 'difficulty_rating', 'Score']]
| name | Distance | length | difficulty_rating | Score | |
|---|---|---|---|---|---|
| 0 | Bright Angel Trail to Bright Angel Campground ... | 106.43 | 28485.318 | 5 | 25.00 |
| 1 | South Kaibab Trail to Cedar Ridge | 108.70 | 4988.954 | 3 | 39.23 |
| 2 | Three-Mile Resthouse via Bright Angel Trail | 106.44 | 8690.436 | 5 | 20.69 |
| 3 | South Kaibab, Phantom Ranch, and Bright Angel ... | 108.70 | 26875.978 | 5 | 17.62 |
| 4 | South Kaibab Trail to Ooh Aah Point | 108.71 | 2896.812 | 3 | 10.62 |
| ... | ... | ... | ... | ... | ... |
| 1361 | Emma Matilda Lake Trail | 477.38 | 17702.740 | 3 | 17.54 |
| 1362 | Solitary Geyser | 512.11 | 2574.944 | 3 | 0.46 |
| 1363 | Dunraven Pass to Mount Washburn | 538.82 | 10943.512 | 5 | 20.15 |
| 1364 | Heart Lake and the Snake River Trails | 505.88 | 38624.160 | 5 | 31.77 |
| 1365 | Grebe Lake Trail | 533.04 | 10460.710 | 1 | 26.77 |
1366 rows × 5 columns
areas = trail_info['area_name'].unique()
area_info = pd.DataFrame()
for area in areas:
area_trails = trail_info[trail_info['area_name'] == area]
area_lat = area_trails['latitude'].mean()
area_long = area_trails['longitude'].mean()
area_distance = round(haversine(area_long, area_lat, user_long, user_lat),2)
area_max = max(area_trails['Score'])
area_average = area_trails['Score'].mean()
area_min = min(area_trails['Score'])
area_count = len(area_trails)
area_sum = sum(area_trails['Score'])
area_output = pd.DataFrame(data = {'Area':[area],'Distance':[area_distance],
'Min Score':[area_min],'Mean Score':[area_average],
'Max Score':[area_max],'Sum Score':[area_sum],'Trail Count':[area_count]})
area_info = pd.concat([area_info, area_output])
area_info.sort_values(by=['Sum Score'], ascending=False).head(10)
| Area | Distance | Min Score | Mean Score | Max Score | Sum Score | Trail Count | |
|---|---|---|---|---|---|---|---|
| 0 | Rocky Mountain National Park | 462.56 | 0.00 | 22.890483 | 49.77 | 3319.12 | 145 |
| 0 | Yosemite National Park | 343.66 | 1.08 | 21.487482 | 42.08 | 2986.76 | 139 |
| 0 | Olympic National Park | 897.06 | 0.00 | 25.883874 | 49.23 | 2873.11 | 111 |
| 0 | Yellowstone National Park | 530.34 | 0.46 | 22.396634 | 43.62 | 2262.06 | 101 |
| 0 | Mount Rainier National Park | 787.80 | 7.69 | 23.379222 | 43.85 | 2104.13 | 90 |
| 0 | Glacier National Park | 786.65 | 4.85 | 24.132941 | 37.08 | 2051.30 | 85 |
| 0 | Grand Canyon National Park | 105.98 | 0.00 | 21.555806 | 53.85 | 1336.46 | 62 |
| 0 | Grand Teton National Park | 465.49 | 8.31 | 25.470385 | 44.69 | 1324.46 | 52 |
| 0 | Zion National Park | 17.59 | 2.46 | 19.540794 | 47.00 | 1231.07 | 63 |
| 0 | Sequoia National Park | 301.32 | 1.31 | 22.414340 | 46.46 | 1187.96 | 53 |
# Sort trails
my_trails = trail_info.sort_values(by=['Score'], ascending=False)
# Determine parameters
exp_count = 1
max_count = 10
max_exp_length = 5
user_max_distance = 50
expeditions = pd.DataFrame()
# Loop through top exp_count
for index, row in my_trails.iterrows():
# Find base trail information
curr_trail = row['name']
curr_lon = row['longitude']
curr_lat = row['latitude']
expedition = pd.DataFrame(data = {'Trail':[curr_trail],'Distance':[0],'Exp Num':[exp_count],'Exp Order':[1]})
expeditions = pd.concat([expeditions,expedition])
trail_count = 1
exp_trails = [curr_trail]
# Append trails for expedition
while True:
# Take current expedition trails out of search
temp_df = trail_info[~trail_info['name'].isin(exp_trails)]
# Find distance to current trail for all others and calculate score / sort
temp_df['Distance to currTrail'] = my_trails.apply(lambda trail: haversine(curr_lon, curr_lat, trail['longitude'], trail['latitude']), axis=1)
temp_df['Expedition Score'] = temp_df['Score'] / temp_df['Distance to currTrail']
temp_df = temp_df.sort_values(by=['Expedition Score'], ascending=False)
temp_df = temp_df[temp_df['Distance to currTrail'] >= 2]
# Find new trail to add
new_trail = temp_df.head(1)
# If within distance, append trail and reset values. If not, break
if new_trail.iloc[0]['Distance to currTrail'] > user_max_distance:
break
elif trail_count == max_exp_length:
break
else:
new_trail_df = pd.DataFrame(data = {'Trail':[new_trail.iloc[0]['name']],'Distance':[new_trail.iloc[0]['Distance to currTrail']],'Exp Num':[exp_count],'Exp Order':[trail_count+1]})
expeditions = pd.concat([expeditions, new_trail_df])
curr_trail = new_trail.iloc[0]['name']
exp_trails.append(curr_trail)
curr_lon = new_trail.iloc[0]['longitude']
curr_lat = new_trail.iloc[0]['latitude']
trail_count += 1
# Add expedition to list
expeditions.append(expedition)
# Check expedition count
if max_count == exp_count:
break
else:
exp_count += 1
info_to_join = trail_info[['name','length','difficulty_rating','Distance','area_name','Score','latitude','longitude']]
info_to_join = info_to_join.rename(columns={"name": "Trail", "Distance": "Distance from User"})
expeditions = pd.merge(expeditions, info_to_join, on='Trail')
expeditions['Exp Order'] = pd.Categorical(expeditions['Exp Order'])
expeditions['Map Size'] = 5
user_exp_to_plot = 3
exp_to_plot = expeditions[expeditions['Exp Num'] == user_exp_to_plot]
fig = px.scatter_mapbox(exp_to_plot,
lat="latitude",
lon="longitude",
hover_name="Trail",
hover_data=["Score","length","difficulty_rating","area_name"],
color="Exp Order",
size = 'Map Size',
color_discrete_sequence=px.colors.qualitative.Plotly,
zoom=10,
height=800,
width=800)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()